State Results

# clear labels for versions
versions <- tibble(
  version = c("v1", "v2", "v3", "v4", "v5", "v6", "v7"),
  vlabel = factor(
    c( "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$", 
    "$\\beta$ Centered at Survey Value",
    "$Pr(S_1|untested)$ Centered at Survey Value",
    "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
        "$\\beta$ Centered at Survey Value (Spline Smoothed)",
         "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values ($\\beta$ Spline Smoothed)",
        "$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$"
    ),
    levels = c(
        "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$", 
        "$\\beta$ Centered at Survey Value",
         "$Pr(S_1|untested)$ Centered at Survey Value",
        "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
        "$\\beta$ Centered at Survey Value (Spline Smoothed)",
         "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values ($\\beta$ Spline Smoothed)",
        "$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$"
    ) )
)


# read state-level results
targets_dir <- here("_targets")

state_res <- tar_read(state_v1, 
                   store =targets_dir) %>% 
  bind_rows(
    tar_read(state_v2,
             store =targets_dir)
  ) %>%
   bind_rows(
    tar_read(state_v3, 
             store =targets_dir)
  ) %>%
   bind_rows(
    tar_read(state_v4, 
             store =targets_dir)
   )%>%
   bind_rows(
    tar_read(state_v5,
             store =targets_dir)
  )%>%
   bind_rows(
    tar_read(state_v6,
             store =targets_dir)
  ) %>%
   bind_rows(
    tar_read(state_v7,
             store =targets_dir)
  )

covidestim <- tar_read(covidestim_biweekly_state,
                        store =targets_dir)

dates <- readRDS(here("data/data_raw/date_to_biweek.RDS"))
codes <- read_csv(here('data/demographic/statecodes.csv'))

# last biweek for first model is biweek 24
last_biweek_old_model <- dates %>% 
  group_by(biweek) %>%
  summarize(mindate=min(date),maxdate=max(date))  %>%
  filter("2021-11-30" >= mindate & "2021-11-30" <= maxdate)


cat("Last two week interval with previous model was biweek", last_biweek_old_model$biweek,
    "\ndates", paste0("(", last_biweek_old_model$mindate, ", ",
                    last_biweek_old_model$maxdate, ")"))
## Last two week interval with previous model was biweek 24 
## dates (2021-11-19, 2021-12-02)
end_old_model <- last_biweek_old_model$maxdate


state_res <- state_res %>%
  rename(state=fips) %>%
  left_join(versions) %>%
  mutate(state=toupper(state)) %>%
  left_join(covidestim, relationship= "many-to-many") %>%
  left_join(dates, relationship = "many-to-many")  %>%
  rename(name=state_name) %>%
  group_by(biweek) %>%
  mutate(mindate=min(date),
         maxdate=max(date)) %>%
  ungroup()


if(filter_versions){
  state_res <- state_res %>%
  filter(version %in% c("v3", "v5", "v6", "v7"))  %>%
    mutate(vlabel=case_when(
      version == "v7" ~  "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
      version == "v5" ~ "$\\beta$ Centered at Survey Value",
      version =="v6" ~  "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
      version == "v3" ~ "$Pr(S_1|untested)$ Centered at Survey Value"
    ))

}
theme_c <- function(...){ 
   # font <- "Helvetica"   #assign font family up front
    theme_bw() %+replace%    #replace elements we want to change
    
    theme(
      
      
      #text elements
      plot.title = element_text(             #title
                   size = 16,                #set font size
                   face = 'bold',            #bold typeface
                   hjust = .5,
                   vjust = 3),               
      
      plot.subtitle = element_text(          #subtitle
                   size = 12,
                   hjust = .5,
                   face = 'italic',
                   vjust = 3),               #font size
      
      axis.title = element_text(             #axis titles
                   size = 12),               #font size
      
      axis.text = element_text(              #axis text
                   size = 9),
      legend.text = element_text(size = 12),
      # t, r, b, l
      plot.margin = unit(c(1,.5,.5,.5), "cm"),
      legend.position = "right",
      strip.text.x = element_text(size = 11, face = "bold", color="white"),
      strip.text.y = element_text(size = 11, face = "bold", color="white",angle=270),
      strip.background = element_rect(fill = "#3E3D3D")
      ) %+replace%
      theme(...)
   
}

Summarizing Concordance with Covidestim

Below, we see that although the implementation where \(\Pr(S_1|untested)\) was at the survey as well as the implementation did not vary by state or date, the mean simulation interval width was actually lower, indicating the greater concordance was not merely a result of wider bounds.

state_res %>%
  select(-c(date)) %>%
  distinct() %>%
  mutate(interval_length =exp_cases_ub - exp_cases_lb) %>%
  group_by(vlabel) %>%
  summarize(mean_interval_length = mean(interval_length)) %>%
  arrange(desc(mean_interval_length)) %>%
  kableExtra::kbl()
vlabel mean_interval_length
\(Pr(S_1|untested)\) and \(\beta\) Centered at Survey Values 338438.0
\(\beta\) Centered at Survey Value 315176.5
\(Pr(S_1|untested)\) Centered at Survey Value 302452.5
\(Priors\,Do\,Not\,Vary\,by\,State\,and\,Date\) 146420.7
state_res_differenced <- state_res %>%
  select(-date) %>% distinct() %>%
  mutate(testpos=positive/total) %>%
  group_by(state, vlabel) %>%
  mutate(exp_cases_median_differenced =
           exp_cases_median - lag(exp_cases_median,
                                  n =1,
                                  order_by = biweek),
         infections_differenced = infections - lag(infections,
                                                   n = 1,
                                                   order_by=biweek),
         testpos_differenced = lag(testpos, n=1, order_by=biweek),
         positive_differenced =lag(positive,n=1,order_by=biweek)) %>%
  select(state, name, vlabel, contains('differenced'), biweek) %>%
  filter(!is.na(exp_cases_median_differenced)) %>%
  filter( biweek <=25)


# if lag is NEGATIVE, covidestim LEADS wastewater
# if lag is POSITIVE, covidestim LAGS wastewater
# since by the documentation, 'the lag k value returned by ccf(x, y) estimates
# the correlation between x[t+k] and y[t].'

get_max_lag_by_version <- function(input_df_for_version,
                                   varnames = c("infections_differenced",
                                  "exp_cases_median_differenced"),
                                  comparison = "bias corrected") {
  
  var1 <- varnames[1]
  var2 <-varnames[2]
  lab <- unique(input_df_for_version$vlabel)
  
  cross_correlations <- ccf(
    pull(input_df_for_version, var1),
     pull(input_df_for_version, var2), 
                            plot = FALSE,
    lag.max=3)
  

  ccf_res <- tibble(lag = cross_correlations$lag[,,1], 
                  correlation=cross_correlations$acf[,,1]) 
  
  m <- ccf_res %>% filter(correlation == max(correlation))
  
  tibble(vlabel = unique(input_df_for_version$vlabel),
         fips = unique(input_df_for_version$state),
         max_lag = m$lag,
         max_corr = m$correlation,
         comparison = comparison,
         name = unique(input_df_for_version$name))

}


cross_corr <- state_res_differenced %>%
  group_by(state, vlabel) %>%
  group_split() %>%
  map_df(get_max_lag_by_version) 
  
states_with_lag <- cross_corr %>%
  filter(max_lag >  0 & vlabel == "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$") %>%
  # remove those where lag appears to be due to low correlation rather 
  # than true lag
  filter(max_corr > .6) %>% 
  pull(fips) %>%
  unique()



cross_corr_testpos <- state_res_differenced %>%
  filter(biweek >=6 & biweek <= 25) %>%
  # positive tests and positivity are not dependent on vlabel; use this
  # one because it goes to earlier dates
  filter(vlabel=="$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$") %>%
  group_by(state, vlabel) %>%
  group_split() %>%
  map_df(~get_max_lag_by_version(.x,
    varnames = c("positive_differenced", 
                 "testpos_differenced"))) %>%
  filter(max_lag > 0) %>%
  pull(fips) %>%
  unique()

# length(states_with_lag)
# setdiff(states_with_lag, cross_corr_testpos) %>% length()
# corrected %>%
#   mutate(in_interval = case_when(
#     exp_cases_lb <= infections & infections <= exp_cases_ub ~ "In Interval",
#     exp_cases_lb  > infections  ~ "Below Interval",
#     exp_cases_ub < infections ~ "Above Interval"
#   )) %>%
#   filter(!is.na(in_interval)) %>%
#   group_by(in_interval, state, vlabel) %>%
#   summarize(n_per_county=n()) %>%
#   group_by(vlabel, in_interval) %>%
#   summarize(n_per_version = sum(n_per_county)) %>%
#   group_by(vlabel) %>%
#   mutate(total = sum(n_per_version)) %>%
#   ungroup() %>%
#   mutate(prop=n_per_version/total)



by_version <- state_res %>%
 # filter(biweek >=6) %>% 
  select(-date) %>%
  distinct() %>% 
  mutate(in_interval = case_when(
    exp_cases_lb <= infections & infections <= exp_cases_ub ~ "In Interval",
    exp_cases_lb  > infections  ~ "Below Interval",
    exp_cases_ub < infections ~ "Above Interval"
  )) %>%
  filter(!is.na(in_interval)) %>%
  group_by(in_interval, vlabel) %>%
  summarize(n=n()) %>%
  group_by(vlabel) %>%
  mutate(total = sum(n)) %>%
  mutate(prop=n/total) 

labels <- by_version %>%
  arrange(prop) %>%
  pull(vlabel) %>%
  as.character() %>%
  unique()


by_version %>%
  mutate(in_interval = factor(in_interval, 
                             levels = c("Above Interval",
                                        "In Interval",
                                        "Below Interval"
                                        ))) %>%
   ggplot(aes(x= fct_reorder(vlabel,prop,max), 
             fill = in_interval, y =prop)) + 
  geom_bar(stat="identity",position="stack") +
  theme_c() +
  coord_flip()+ scale_fill_manual(values=c("In Interval" = "#79D2AF",
                             "Above Interval" = "#D2AF79",
                      "Below Interval"="#7997D2"),
                       breaks = c("Below Interval",
                               "In Interval",
                               "Above Interval")) +
  labs(x="", 
       y= "Proportion",
       fill = "Covidestim Median:",
       title = "Proportion of Simulation Intervals Where Covidestim Median\nWas Within, Above, or Below the Median") +
  theme_c() +
  theme_c(legend.position="right",
          legend.title = element_text(face="bold", size = 15),
          legend.text= element_text( size = 15),
          legend.spacing.y = unit(3, 'pt'),
          axis.text.y = element_text(size = 17, hjust=1)) +
  theme(plot.title = element_text(size = 20),
          plot.subtitle = element_text(size=18, 
                                       face='italic', 
                                       margin=margin(4,0,4,0)),
        axis.text.x=element_text(size=12),
        axis.title = element_text(size=14)) +
  scale_x_discrete(labels = (TeX(labels)))

ggsave(here('figures/concordance-covidestim-state.pdf'))
by_version %>%
  group_by(vlabel) %>%
  filter(in_interval == "In Interval") %>%
  mutate(n_interval = n) %>%
  ggplot(aes(y=fct_reorder(vlabel,prop), x=prop)) +
  geom_text(aes(label=round(prop,3), x= prop+.03), 
             angle=0) +
  geom_bar(stat="identity", fill="#596281") +
  scale_y_discrete(breaks= unique(by_version$vlabel),
                   labels=function(x)TeX(x)) +
  scale_x_continuous(expand=c(0,0), 
                     limits=c(0,.9), n.breaks=7)  +
  theme_c(axis.text.y = element_text(size = 13, hjust=1),
          axis.title.x = element_text(size=14),
          axis.text.x=element_text(size=10)) +
  labs(y="",
       x="Proportion Containing the Covidestim Median",
       title="Proportion Containing the Covidestim Median",
       subtitle = "By Implementation")

Simulation Intervals Over Time

all_versions <- state_res %>% 
  group_by(state) %>%
  summarize(state_n=n_distinct(version)) %>%
  filter(state_n == max(state_n)) %>%
  pull(state)
state_res %>%
   filter(state %in% sample(unique(all_versions),5))  %>% 
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub),
             alpha = .8,
             show.legend=FALSE,
             fill="#596281") +
   geom_linerange(aes(xmin = mindate,
                      xmax=maxdate,
                      y = positive,
                      color = 'obs'),
                  size=.5) +
  facet_grid(name~vlabel, 
             labeller=labeller(vlabel =as_labeller(
               TeX, default=label_parsed)),
             scales="free_y") +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3 months", 
               date_labels = "%b %Y") +
  theme_c(
          legend.position = "top",
          ) +
  scale_fill_manual(values = pal)  +
  labs(x="",
       y = "Estimated Infections",
       title = "Simulation Intervals Over Time",
       subtitle = "By State") +
  scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
                     name = '') +
  guides(color = guide_legend(override.aes = list(linewidth =2)))

Compare Versions

All Versions

subset <- state_res %>%
  filter(vlabel ==  "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$") 

# pal <- c("#247C90", "red")
# 
# names(pal) <- c(as.character(unique(subset$vlabel)), "Covidestim")


state_res %>%
   filter(state %in% sample(unique(all_versions),5))  %>% 
  filter(version %in% c("v2", "v5", "v6", "v7")) %>%
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub,
             fill=vlabel),
             alpha = .8,
             show.legend=FALSE #,
           #  fill="#596281"
             ) +
   geom_linerange(aes(xmin = mindate,
                      xmax=maxdate,
                      y = positive,
                      color = 'obs'),
                  size=.5) +
  facet_grid(name~vlabel, 
             labeller=labeller(vlabel =as_labeller(
               TeX, default=label_parsed)),
             scales="free_y") +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3 months", 
               date_labels = "%b %Y") +
  theme_c(
          legend.position = "top",
          ) +
  scale_fill_manual(values = pal)  +
  labs(x="",
       y = "Estimated Infections",
       title = "Simulation Intervals Over Time",
       subtitle = "By State") +
  scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
                     name = '') +
  guides(color = guide_legend(override.aes = list(linewidth =2)))

Wu et al.’s Priors versus CTIS Priors (that do not vary)

colors_labs <- state_res %>%
  filter(version %in% c("v1","v7")) %>%
  pull(vlabel) %>% unique()

state_res %>%
  filter(version %in% c("v1","v7") & state %in% sample(unique(.$state),15)) %>%
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub,
             fill=vlabel),
             alpha = .6
             ) +
  facet_wrap(~name, 
             labeller=labeller(vlabel =as_labeller(
               TeX, default=label_parsed),
               nrow=3),
             scales="free_y") +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3 months", 
               date_labels = "%b %Y") +
  theme_c(
          legend.position = "top",
          ) +
  viridis::scale_fill_viridis(option="magma", begin=.1,end=.9, discrete=TRUE,
                              breaks =(colors_labs),
                              labels=TeX(as.character(colors_labs)))  +
  labs(x="",
       y = "Estimated Infections",
       title = "Simulation Intervals Over Time",
       subtitle = "By State",
       fill = "") 

Compare States

# comp_state_pal <- c("#247C90", "red")
# 
# names(comp_state_pal) <- c('Probabilistic Bias Analysis', "Covidestim")
# 
# 
# vlab_for_pal <- unique(subset$vlabel) %>% as.character()

color_for_bias <- pal[[which(names(pal)=="$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$")]]

comp_state_pal<- c(color_for_bias,"red")
names(comp_state_pal) <- c('Probabilistic Bias Analysis', "Covidestim")


subset %>%
   # mutate(vlabel = gsub("$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$",
   #                     "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$", as.character(vlabel))) %>% 
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub,
             fill = 'Probabilistic Bias Analysis'),
             alpha = .8,
             show.legend=FALSE) +
  geom_ribbon(aes(x = date,
                  fill = 'Covidestim',
                  ymin = infections.lo,
                  ymax = infections.hi),
              alpha = .6) +
  geom_vline(xintercept=end_old_model,
             linetype=2,
             color="darkred",
             linewidth=.5) +
  facet_wrap(~name, ncol=4, scales = "free",
              labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3.5 months", 
               date_labels = "%b %Y") +
  theme_c(axis.text.x=element_text(size=7),
          axis.text.y = element_text(size=4.5),
          legend.position = "top",
          legend.text=element_text(size=12),
          strip.text.x=element_text(size=11,
                                    face='plain',
                                    color='white')) +
  scale_fill_manual(values =comp_state_pal, name='')  +
  labs(y = "Estimated Infections",
       title = paste0("Estimated Infections by State")) +
  guides(color = guide_legend(override.aes = list(linewidth =2),
                              nrow=2,
                              byrow=TRUE))

ggsave(here('figures/intervals-and-covidestim-all-states.pdf'))

Compare States that Have a Lag

States where the Covidestim estimates are lagged relative to the probabilistic bias counts are Arkansas, California, Florida, Georgia, Indiana, Iowa, North Carolina, South Carolina, Texas.

# states_with_lag

lagged_fig_a <- subset %>%
    filter(state %in% states_with_lag & biweek <= 25 & biweek >=6) %>%
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub,
             fill = 'Probabilistic Bias Analysis'),
             alpha = .8,
             show.legend=FALSE) +
  geom_ribbon(aes(x = date,
                  fill = 'Covidestim',
                  ymin = infections.lo,
                  ymax = infections.hi),
              alpha = .6) +
  # geom_vline(xintercept=end_old_model,
  #            linetype=2,
  #            color="darkred",
  #            linewidth=.5) +
  facet_wrap(~name, ncol=3, scales = "free",
              labeller= labeller(
                vlabel =as_labeller(TeX, default=label_parsed))) +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3 months", 
               date_labels = "%b %Y") +
  theme_c(axis.text.x=element_text(size=7),
          axis.text.y = element_text(size=4.5),
          legend.position = "top",
          legend.text=element_text(size=12),
          strip.text.x=element_text(size=11,
                                    face='plain',
                                    color='white')) +
  theme(plot.title = element_text(face="bold",
                                 hjust = 0, 
                                 size = 13,
                                 margin=margin(0,0,0,0))) +
  scale_fill_manual(values =comp_state_pal, name='')  +
  labs(y = "Estimated Infections",
       title = paste0(
         "(a) Estimated Infections by State in States with a Lag")) +
  guides(color = guide_legend(override.aes = list(linewidth =2),
                              nrow=2,
                              byrow=TRUE))
lagged_fig_a

ggsave(here('figures/intervals-and-covidestim-states-lag.pdf'))

Test Positivity and Positive Tests in States with a Lag

############################################
# POSITIVE TESTS AND TEST POSITIVITY
############################################

 subset %>%
  filter(biweek<=25) %>%
  filter(state %in% states_with_lag) %>%
    mutate(testpos = positive/total)
pltlist <- subset %>%
  filter(biweek >= 6 & biweek<=25) %>%
  filter(state %in% states_with_lag) %>%
    mutate(testpos = positive/total) %>%
  group_by(across(-date)) %>%
  summarize(date=min(date) + days(7)) %>%
  group_by(state) %>%
  mutate(adj = (1/max(testpos)) * max(positive)) %>%
  group_split() %>%
  map(~ {
    adj <-unique(.x$adj)
    .x %>%
      ggplot(aes(x=date))+
    geom_line(aes(y=positive, color = 'Positive Tests'),
              alpha=.8) +
      geom_point(aes(y=positive, color = 'Positive Tests'),
              alpha=.5,
              size=.9) +
    geom_line(aes(y=testpos*adj, color='Test Positivity'),
              alpha=.8
              #,linetype=2
              ) +
      geom_point(aes(y=testpos*adj, color='Test Positivity'),
              alpha=.5,
              size=.9) +
    #scale_linetype_discrete(breaks = c(1,1,2)) +
    scale_color_manual(values=c(
                                "Positive Tests"= "#279143",
                                "Test Positivity"= '#E38E4F')) +
    guides(color = guide_legend(override.aes = list(linetype = c(1,2),
                                                    linewidth=c(2,2)))) +
    theme_c() +
    theme(axis.title = element_text(size=10),
          axis.text =element_text(size=9),
          legend.position="none") +
    scale_y_continuous(labels=comma, 
                       sec.axis = sec_axis(
                         ~./adj,
                         name = 'Test Positivity')) +
    labs(color ='',
         y="Positive Tests",
         x= "Date") +
      facet_wrap(~name) +
      scale_x_date(date_labels="%b %Y",
                   date_breaks = "3 months")
        
  })

legend <- get_legend(
  pltlist[[1]] +
    theme(legend.position = "top",
          legend.text=element_text(size=13, margin=margin(6,3,0,0)))
)


title_gg <- ggplot() +
  labs(
    #title = "(b) Comparing Trends in Positive Tests and Test Positivity"
    title = "") +
  theme(plot.title=element_text(face="bold",
                                hjust = 0,
                                size = 13))


plt <- cowplot::plot_grid(plotlist=pltlist, ncol=3) 

lagged_fig_b <- plot_grid(title_gg, legend, plt, rel_heights=c(.03, .03,.94), ncol=1)

# lagged_fig_b <- plot_grid( legend, plt, rel_heights=c(.05,.95), ncol=1)

lagged_fig_b

States Where Covidestim Estimates Appear to be Lagged Relative to Probabilistic Bias Counts

lag_title <- ggplot() +
  labs(title = "States Where Covidestim Estimates Appear to be Lagged\n Relative to Probabilistic Bias Counts") +
  theme(plot.title=element_text(face="bold", hjust = .5, size=18))

plot_grid(# lag_title, 
          lagged_fig_a + labs(title=""), 
          lagged_fig_b,
          labels=c(
           # "",
            "(a) Estimated Infections by State in States with a Lag",
            "(b) Comparing Trends in Positive Tests and Test Positivity in States with a Lag"),
        #  rel_heights = c(.04, .47,.49),
          label_x=0,
         # vjust=c(1.5,1.5,.1),
        vjust=c(1.5,.1),
          hjust=-.05,
          ncol=1)

ggsave(here('figures/testpos-lag.pdf'))
########################################################
# PERCENT CHANGE IN POSITIVE TESTS AND TEST POSITIVITY
########################################################

 subset %>%
  filter(biweek<=25) %>%
  filter(state %in% states_with_lag) %>%
    mutate(testpos = positive/total)
pltlist <- subset %>%
  filter(biweek >= 6 & biweek<=25) %>%
  filter(state %in% states_with_lag) %>%
    mutate(testpos = positive/total) %>%
  group_by(across(-date)) %>%
  # put date in midpoint
  summarize(date=min(date) + days(7)) %>%
  group_by(state) %>%
  mutate(pct_change_testpos = (testpos-lag(testpos, n=1, order_by=biweek)) /
                                 lag(testpos, n=1,
                                     order_by=biweek),
         pct_change_positive = (positive - lag(positive,
                                               n=1,
                                               order_by=biweek)) / 
                                 lag(positive,
                                               n=1,
                                               order_by=biweek)) %>%
 #  mutate(adj = (1/max(testpos)) * max(positive)) %>%
  group_split() %>%
  map(~ {
   # adj <-unique(.x$adj)
    .x %>%
      ggplot(aes(x=date))+
    geom_line(aes(y=pct_change_positive, color = 'Positive Tests'),
              alpha=.8) +
      geom_point(aes(y=pct_change_positive, color = 'Positive Tests'),
              alpha=.5,
              size=.9) +
    geom_line(aes(y=pct_change_testpos, color='Test Positivity'),
              alpha=.8
              #,linetype=2
              ) +
      geom_point(aes(y=pct_change_testpos, color='Test Positivity'),
              alpha=.5,
              size=.9) +
    #scale_linetype_discrete(breaks = c(1,1,2)) +
    scale_color_manual(values=c(
                                "Positive Tests"= "#279143",
                                "Test Positivity"= '#E38E4F')) +
    guides(color = guide_legend(override.aes = list(linetype = c(1,2),
                                                    linewidth=c(2,2)))) +
    theme_c() +
    theme(axis.title = element_text(size=10),
          axis.text =element_text(size=9),
          legend.position="none") +
    # scale_y_continuous(labels=comma, 
    #                    sec.axis = sec_axis(
    #                      ~./adj,
    #                      name = 'Test Positivity')) +
    labs(color ='',
         y="Positive Tests",
         x= "Date") +
      facet_wrap(~name) +
      scale_x_date(date_labels="%b %Y")
        
  })

legend <- get_legend(
  pltlist[[1]] +
    theme(legend.position = "top",
          legend.text=element_text(size=16))
)


title_gg <- ggplot() + 
  labs(title = "Comparing Trends in Positive Tests and Test Positivity") + 
  theme(plot.title=element_text(face="bold",
                                hjust = .5, 
                                size = 18,
                                margin =margin(5,0,2,0)))


plt <- cowplot::plot_grid(plotlist=pltlist, ncol=3) 

plot_grid(title_gg, legend, plt, rel_heights=c(.03, .03,.94), ncol=1)

########################################################
# PERCENT CHANGE IN POSITIVE TESTS AND TOTAL TESTS
########################################################

pltlist <- subset %>%
  filter(biweek >= 6 & biweek<=25) %>%
  filter(state %in% states_with_lag) %>%
    mutate(testpos = positive/total) %>%
  group_by(across(-date)) %>%
  # put date in midpoint
  summarize(date=min(date) + days(7)) %>%
  group_by(state) %>%
  mutate(pct_change_testpos = (total-lag(total, n=1, order_by=biweek)) /
                                 lag(total, n=1,
                                     order_by=biweek),
         pct_change_positive = (positive - lag(positive,
                                               n=1,
                                               order_by=biweek)) / 
                                 lag(positive,
                                               n=1,
                                               order_by=biweek)) %>%
 #  mutate(adj = (1/max(testpos)) * max(positive)) %>%
  group_split() %>%
  map(~ {
   # adj <-unique(.x$adj)
    .x %>%
      ggplot(aes(x=date))+
    geom_line(aes(y=pct_change_positive, color = 'Positive Tests'),
              alpha=.8) +
      geom_point(aes(y=pct_change_positive, color = 'Positive Tests'),
              alpha=.5,
              size=.9) +
    geom_line(aes(y=pct_change_testpos, color='Total Tests'),
              alpha=.8
              #,linetype=2
              ) +
      geom_point(aes(y=pct_change_testpos, color='Total Tests'),
              alpha=.5,
              size=.9) +
    #scale_linetype_discrete(breaks = c(1,1,2)) +
    scale_color_manual(values=c(
                                "Positive Tests"= "#279143",
                                "Total Tests"= '#E38E4F')) +
    guides(color = guide_legend(override.aes = list(linetype = c(1,2),
                                                    linewidth=c(2,2)))) +
    theme_c() +
    theme(axis.title = element_text(size=10),
          axis.text =element_text(size=9),
          legend.position="none") +
    # scale_y_continuous(labels=comma, 
    #                    sec.axis = sec_axis(
    #                      ~./adj,
    #                      name = 'Test Positivity')) +
    labs(color ='',
         y="Total Tests",
         x= "Date") +
      facet_wrap(~name) +
      scale_x_date(date_labels="%b %Y")
        
  })

legend <- get_legend(
  pltlist[[1]] +
    theme(legend.position = "top",
          legend.text=element_text(size=16))
)


title_gg <- ggplot() + 
  labs(title = "Comparing Trends in Positive Tests and Total Tests") + 
  theme(plot.title=element_text(face="bold",
                                hjust = .5, 
                                size = 18,
                                margin =margin(5,0,2,0)))


plt <- cowplot::plot_grid(plotlist=pltlist, ncol=3) 

plot_grid(title_gg, legend, plt, rel_heights=c(.03, .03,.94), ncol=1)

Ratio of Estimated to Observed

subset %>% 
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb/positive,
             ymax = exp_cases_ub/positive,
             fill = vlabel),
             alpha = .9,
             show.legend=FALSE,
             fill= "#247C90") +
  facet_wrap(~state, ncol=5, 
              labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3 months", 
               date_labels = "%b %Y") +
  theme_c(axis.text.x=element_text(size=6),
          axis.text.y = element_text(size=8),
          legend.position = "top") +
  scale_fill_manual(values =pal,
                    labels = TeX(names(pal)),
                    name='',
                    breaks="Covidestim")  +
  labs(y = "Ratio of Estimated Infections to Observed",
       x="",
       title = paste0("Ratio of Estimated Infections to Observed by State")) +
  guides(color = guide_legend(override.aes = list(linewidth =2),
                              nrow=2,
                              byrow=TRUE))

ggsave(here('figures/ratio-estimated-to-observed-all-states.pdf'))

Comparing States at Specific Two-week Intervals

During Alpha Wave

max_val <- subset %>% 
  # left_join(codes) %>%
  filter(biweek %in% c(7, 13,27)) %>%
  mutate(ratio=exp_cases_ub/positive) %>%
  pull(ratio) %>%
  max()
  
  
plt1 <- subset %>% 
  # left_join(codes) %>%
  filter(biweek == 7) %>%
  ggplot(aes(y =fct_reorder(name,exp_cases_median/positive),
             xmin = exp_cases_lb/positive,
             xmax = exp_cases_ub/positive)) +
  geom_errorbar() +
  # geom_point(aes(y=state_name,x=exp_cases_median/positive),
  #            color = "darkred", size=.8, alpha=.5) +
  theme_c() + 
  theme(axis.text.y=element_text(size=10),
        axis.text.x = element_text(size=11),
        axis.title = element_text(size = 17),
        plot.subtitle = element_text(size=14),
        plot.title = element_text(size=17, face="plain")) +
  labs(y="", x="Ratio of Estimated Infections\nto Observed",
       title ="During Two-week Period\nin the Alpha Wave",
       subtitle = "March 26, 2021 through April 8, 2021") +
  xlim(0,max_val+2)

plt1

During Delta Wave

subset %>%
  mutate(ratio = exp_cases_median/positive) %>%
  group_by(biweek) %>%
  summarize(mean = mean(ratio),
            mindate=min(date),
            maxdate=max(date)) %>%
  arrange(desc(mean)) 
plt2 <- subset %>% 
  # left_join(codes) %>%
  filter(biweek == 13) %>%
  ggplot(aes(y =fct_reorder(name,exp_cases_median/positive),
             xmin = exp_cases_lb/positive,
             xmax = exp_cases_ub/positive)) +
  geom_errorbar() +
  # geom_point(aes(y=state_name,x=exp_cases_median/positive),
  #            color = "darkred", size=.8, alpha=.5) +
  theme_c() + 
 theme(axis.text.y=element_text(size=10),
        axis.text.x = element_text(size=11),
        axis.title = element_text(size = 17),
        plot.subtitle = element_text(size=14),
        plot.title = element_text(size=17, face="plain")) +
  labs(y="", x="Ratio of Estimated Infections\nto Observed",
       title ="During Two-week Period\nin the Delta Wave",
       subtitle = "June 18, 2021 through July 1, 2021") +
  xlim(0,max_val+2)

plt2

During Omicron Wave

subset %>% 
  left_join(codes) %>%
  filter(biweek == 27) %>%
  pull(date) %>% 
  range()


subset %>% 
  left_join(codes) %>%
  filter(biweek == 13) %>%
  pull(date) %>% 
  range()


plt3 <- subset %>% 
  # left_join(codes) %>%
  filter(biweek == 27) %>%
  ggplot(aes(y =fct_reorder(name,exp_cases_median/positive),
             xmin = exp_cases_lb/positive,
             xmax = exp_cases_ub/positive)) +
  geom_errorbar() +
  theme_c() + 
  theme(axis.text.y=element_text(size=10),
        axis.text.x = element_text(size=11),
        axis.title = element_text(size = 17),
        plot.subtitle = element_text(size=14),
        plot.title = element_text(size=17, face="plain")) +
  labs(y="", x="Ratio of Estimated Infections\nto Observed",
       title ="During Two-week Period\nin the Omicron Wave",
       subtitle = "December 31, 2021 through January 14, 2022") +
  xlim(0,max_val+2)

plt3

All Waves Together

plt <- plot_grid(plt1,plt2, plt3, nrow=1, align="none", labels="auto", label_size=19)

title <- ggplot() +
  labs(title = "Ratio of Estimated Infections to Observed Infections") +
  theme_c(plot.title=element_text(size=25))

plot_grid(title, plt, nrow=2, 
          rel_heights= c(.05,.95))

ggsave(here('figures/ratio-estimated-to-observed-multiple-waves.pdf'))
cat("Time interval with the highest ratio of estimated cases to observed: ")
## Time interval with the highest ratio of estimated cases to observed:
subset %>%
  group_by(biweek) %>%
  mutate(ratio=exp_cases_median/positive) %>%
  summarize(mean_ratio=mean(ratio),
         date = paste(range(date),collapse=" to ")) %>%
  slice_max(n=1, order_by=mean_ratio)
subset %>% 
  # left_join(codes) %>%
  filter(biweek %in% c(7, 13, 27))  %>%
  select(-date) %>%
  distinct() %>%
  group_by(state) %>%
  mutate(ord = empirical_testpos[which(biweek==7)]) %>%
  mutate(biweek=factor(biweek)) %>%
  ggplot(aes(y=fct_reorder(state,ord),
             x=empirical_testpos,fill=biweek)) +
  geom_bar(stat="identity",position="dodge")+
  theme_c() +
  theme_c() +
  labs(title = "Testing Positivity by State",
       y="",
       x="Biweekly Testing Positivity",
       fill="") +
  scale_x_continuous(n.breaks=10)

subset %>% 
  # left_join(codes) %>%
  filter(biweek %in% c(7, 13, 27))  %>%
  group_by(biweek) %>%
  mutate(daterange= paste(format(min(date), "%B %d %Y"),
                          "to", 
                          format(max(date), "%B %d %Y")),
         daterange=factor(
           daterange,
           levels=c(
             "March 26 2021 to April 08 2021",
             "June 18 2021 to July 01 2021",
             "December 31 2021 to January 14 2022"))) %>%
  select(-date) %>%
  distinct() %>%
  group_by(state) %>%
  mutate(testrate=total/population,
         ord = testrate[which(biweek==7)]) %>%
  mutate(biweek=factor(biweek)) %>%
  ggplot(aes(y=fct_reorder(name,ord,.desc=TRUE),
             x=testrate,fill=daterange)) +
  geom_bar(stat="identity",position="dodge") +
  theme_c() +
  labs(title = "Testing Rate by State",
       y="",
       x="Biweekly Testing Rate",
       fill="") +
  scale_x_continuous(n.breaks=10)

compare_testrates <- subset %>%
  mutate(testrate=total/population) %>%
  select(-date) %>%
  distinct() %>%
  filter(biweek %in% c(7, 13, 27))  %>%
  select(biweek,state,testrate) %>%
  pivot_wider(names_from=biweek, values_from=testrate, names_prefix="biweek_") %>%
  mutate(omicron_over_alpha = biweek_27/biweek_7,
         omicron_over_delta = biweek_27/biweek_13) %>%
  select(state,contains("omicron")) %>%
  pivot_longer(c(omicron_over_alpha,omicron_over_delta)) %>%
  group_by(name) %>%
  summarize(mean_val = mean(value))

cat("The testing rate during this two-week interval during the omicron wave was", compare_testrates %>% filter(name=="omicron_over_alpha") %>% pull(mean_val), "times the testing rate during this time interval in the alpha wave.") 
## The testing rate during this two-week interval during the omicron wave was 2.440557 times the testing rate during this time interval in the alpha wave.
cat("The testing rate during this two-week interval during the omicron wave was", compare_testrates %>% filter(name=="omicron_over_delta") %>% pull(mean_val), "times the testing rate during this time interval in the delta wave.") 
## The testing rate during this two-week interval during the omicron wave was 4.931176 times the testing rate during this time interval in the delta wave.

Rank Ratio of Estimated to Observed Over Time

ranks <- subset %>%
  mutate(ratio = exp_cases_median/positive,
         tested = total/population) %>%
  # one observation per biweek per state
  select(biweek, date, ratio, name,tested) %>%
  group_by(biweek,ratio,name,tested) %>%
  summarize(date=min(date)) %>%
  # rank for each time interval
  group_by(biweek) %>%
  mutate(rank = rank(ratio),
         rank_tested=rank(-tested))


r <- ranks %>%
  mutate(rank_group = case_when(
    rank <= 10 ~  "Top 10",
    rank > 41 ~ "Bottom 10",
    TRUE ~ "Middle" )) %>%
  group_by(name, rank_group) %>%
  mutate(n=n()) %>%
  group_by(name) %>%
  mutate(total =n(),
         max_group = rank_group[which.max(n)]) %>%
  filter( max(n) >24) %>%
  filter(max_group != "Middle")  %>%
  ungroup()


top_10 <- r %>% filter(rank_group=="Top 10") %>%
  arrange(desc(n)) %>% pull(name) %>%
  unique() %>%paste(collapse=", ")

cat("States that consistently have the lowest ratios:", top_10)
## States that consistently have the lowest ratios: Rhode Island, Massachusetts, District of Columbia, Alaska, New York, Vermont
bottom_10 <- r %>% filter(rank_group=="Bottom 10") %>%
  arrange(desc(n)) %>% pull(name) %>%
  unique() %>%paste(collapse=", ")


cat("States that consistently have the highest ratios:", bottom_10)
## States that consistently have the highest ratios: Mississippi, South Dakota, Oklahoma, Nebraska, Tennessee
end_labs <- r %>%
  ungroup() %>%
  filter(date==max(date)) %>%
  mutate(date = date + days(10)) %>%
  select(name, rank, date) %>%
  ungroup()
##############################
# RANK PLOT OVER TIME
##############################
# 
# set.seed(999)
# 
# pal1 <-viridis_pal(option="viridis", end = .9, begin=.3)(3)
# pal2 <- viridis_pal(option="rocket", end = .9, begin=.3)(2)
# pal3 <- viridis_pal(option="magma", end = .9, begin=.3)(2)
# pal4 <- viridis_pal(option="mako", end = .8, begin=.3)(2)
# pal5 <- viridis_pal(option="cividis", end = .9, begin=.1)(3)
# 
# 
# rankpal <- c(pal1, pal2, pal3,pal4,pal5)
# indexes <- sample.int(length(rankpal), replace=FALSE)
# rankpal <- rankpal[indexes]


set.seed(123)

rankpal <- c("#b4ddd4", "#7b2c31", "#65d04b",
             "#da239b", "#b7d165", "#453fbc", 
             "#658114", "#af3fe8", "#ffb947",
             "#0a60a8", "#f87945", "#16d0ae")

rankpal <- c("#851657", "#be3acd", "#19a71f",
             "#824598", "#ed820a", "#BB8E37",
             "#974810", "#1f84ec", "#d02023",
             "#059dc5", "#f23387", "#16d0ae")

indexes <- sample.int(length(rankpal), replace=FALSE)
rankpal <- rankpal[indexes]



r %>%
 # filter(rank_group!="Middle") %>%
  ggplot() +
  geom_point(aes(x=date,y=rank, 
             group=name,
             color= name),
             size=.5) +
  geom_line(aes(x=date,y=rank, 
             group=name,
             color= name)) +
 # facet_wrap(~max_group) +
  ggrepel::geom_text_repel( aes(x= date-days(4),
                y = rank,
                color=name,
                label = name),
           end_labs,
           min.segment.length=2,
           nudge_y=0,
           hjust=0,
           size=3,
           direction="y",
           force_pull=9,
           box.padding=.15) +
  theme_c(legend.position="none",
          axis.text.x = element_text(angle =0,size=14)) +
  # theme(plot.subtitle =element_text(size=18),
  #       axis.title.x=element_text(size=16),
  #       axis.title.y=element_text(size=16)) +
 # scale_color_brewer(palette="Dark2")  +
  scale_color_manual(values=rankpal) +
  scale_x_date(date_breaks="2 months", date_labels = "%b %Y",
               limits = c(ymd("2021-01-01"),
                          ymd("2022-04-15"))) +
  labs(y = "Rank of Ratio",
       title = "Rank of Estimated Infections to Observed Infections Over Time",
       subtitle = "For States Ranked in the Top or Bottom 10\nfor More Than 80% of Time Intervals",
       x="Date") +
  scale_y_reverse(breaks=seq(1,51, by=9))

Summer Testing Rate

subset %>% 
  mutate(testrate = total/population) %>%
  group_by(biweek) %>% 
  mutate(m = median(testrate),
            date=min(date)) %>%
  ggplot(aes(x=date,y=testrate, group=state)) +
  geom_line(alpha=.2) +
  geom_line(aes(y=m, color='Median'), size=2) +
  labs(y = "Total Number of Tests\nOver Population Size",
       title ="Testing Rate Over Time",
       x="") +
  theme_c() +
  theme(axis.title.x = element_text(size=16),
          axis.title.y  = element_text(size=16),
        legend.position='top') +
  geom_vline(xintercept = ymd("2021-07-16"), linetype = 2, linewidth=.5) +
  geom_vline(xintercept = ymd("2021-06-18"), linetype = 2, linewidth=.5) +
  scale_x_date(date_breaks="2 months",
               date_labels = "%b %Y") +
  scale_color_manual(values=c(Median="darkred"),name='')

ggsave(here('figures/testrate-low-summer.pdf'), height=6,width=12, dpi=400)

Variant Data

m <- state_res %>%
  filter(state == "MI" & version=="v7") %>%
  pull(exp_cases_ub) %>%
  max()


variant <- tar_read(variant, store =targets_dir)

varpal <- c("#7BD8DA","#709f0f", "#e71761", "#9245c8", 
            "#97481c", "#5054b1", "#DA7BA8", "#26B392")

state_res %>%
  filter(state == "MI" & version %in% c("v7")) %>%
 # filter(state %in% sample(corrected$state,5)) %>%
 # filter(biweek >=6) %>%
  group_by(biweek) %>%
  mutate(xmin = min(date),
         xmax = max(date)) %>%
  ungroup() %>%
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub,
             fill =vlabel),
           #  fill = "#3E3D3D",
             alpha = .5,
             show.legend=FALSE) +
  scale_x_date(date_breaks = "2 months", 
               date_labels = "%b %Y") +
  theme_c(legend.position="top") +
  geom_line(aes(x=week, y=share*m,
                color =variant_category,
                group=variant_category),
            data=variant,
            linewidth=1.3) +
  # scale_fill_manual(values =pal,
  #                   labels = TeX(names(pal)),
  #                    breaks='Covidestim',
  #                   name='')  +
  scale_y_continuous(sec.axis = sec_axis( trans=~./m, name="Variant Proportion"),
                     labels = comma) +
  labs(y = "Estimated Infections",
       title = paste0("Estimated Infections by Version in Michigan"),
       color = "SARS-CoV-2 Variant",
       x="Date") +
  guides(color = guide_legend(override.aes = list(linewidth =3),
                              ncol=2,title.position="top")) +
 # scale_color_brewer(palette="Accent") 
  scale_color_manual(values=varpal) +
  scale_fill_manual(values=pal)

ggsave(here('figures/michigan_variant.pdf'), dpi=400)

Comparing Overlap

Weaknesses in this approach:

  • it doesn’t distinguish between a very small interval being covered (i.e. when there are very few cases) and a larger interval being covered (i.e. when there are many cases)
  • it doesn’t consider the width of the PB interval
state_overlaps <- subset %>%
  select(-date) %>%
  distinct() %>%
  mutate(lower_overlap = map2_dbl(infections.lo, exp_cases_lb, ~max(.x,.y)),
         upper_overlap = map2_dbl(infections.hi, exp_cases_ub, ~min(.x,.y)),
         overlap = case_when(
           upper_overlap-lower_overlap > 0 ~   upper_overlap-lower_overlap,
           TRUE ~ 0
         )) %>%
  select(contains('overlap'), name, biweek, 
         population, positive, total, exp_cases_lb,
         exp_cases_ub, contains('infections')) %>%
  mutate(overlap_not_norm = overlap,
         overlap = overlap/population,
         fraction_overlap = overlap_not_norm/(infections.hi-infections.lo)) 



#############
# BY STATE
#############
state_overlaps %>%
  group_by(name) %>%
  summarize(mean_overlap=mean(fraction_overlap)) %>%
  ggplot(aes(y= fct_reorder(name, mean_overlap),
             x =  mean_overlap)) +
  geom_bar(stat="identity") +
  theme_c() +
  labs(y="",
       x="Mean Overlap",
       title = "Mean Overlap with Covidestim by State",
       subtitle = "Overlap Normalized by Width of Covidestim Interval")

##########################
# BY TWO-WEEK INTERVAL
##########################
state_overlaps %>%
  group_by(biweek) %>%
  summarize(mean_overlap=mean(fraction_overlap)) %>%
  left_join(dates) %>%
  group_by(biweek) %>%
  summarize(date = median(date),
            mean_overlap=unique(mean_overlap)) %>%
  ggplot(aes(x=date,
             y =  mean_overlap)) +
  geom_bar(stat="identity", alpha =.7) +
  geom_vline(xintercept=end_old_model,linetype=2,
             color='darkred',
             linewidth=1.1) +
  theme_c() +
  labs(y="",
       x="Mean Overlap",
       title = "Mean Overlap with Covidestim by Time Interval",
       subtitle = "Overlap Normalized by Width of Covidestim Interval") +
  scale_x_date(date_breaks="2 months", date_labels="%b %Y")

##########################
# BY TWO-WEEK INTERVAL
##########################
# state_overlaps %>%
#   mutate(posrate=positive/total) %>%
#   filter(name %in% sample(unique(state_overlaps$name), 5)) %>%
#   ggplot(aes(x=posrate, y = fraction_overlap)) +
#   geom_point(alpha=.8) +
#   theme_c() +
#   # facet_wrap(~name, scales="free_y") +
#   labs(x = "Positivity Rate", y = "Overlap") +
#   scale_y_continuous(labels=comma)
set.seed(123)
state_sample <- sample(unique(state_overlaps$name), 16)

state_overlaps %>%
  mutate(posrate=positive/total) %>%
 # filter(name %in% state_sample) %>%
  ggplot(aes(x=posrate, y = fraction_overlap)) +
  geom_point(alpha=.8) +
  theme_c() +
  facet_wrap(~name, scales="free_y") +
  labs(x = "Positivity Rate", y = "Overlap",
       title = "Comparing Fraction of Interval Covered by Positivity Rate",
        subtitle = "Each Point Corresponds to a State-Biweek")

state_overlaps %>%
  mutate(posrate=positive/total) %>%
#  filter(name %in% state_sample) %>%
  ggplot(aes(x=posrate, y = fraction_overlap)) +
  geom_point(alpha=.8) +
  theme_c() +
 # facet_wrap(~name, scales="free_y") +
  labs(x = "Positivity Rate", y = "Overlap",
       title = "Comparing Fraction of Interval Covered by Positivity Rate",
        subtitle = "Each Point Corresponds to a State-Biweek")

state_overlaps %>%
  mutate(testrate=total/population) %>%
  filter(name %in% state_sample) %>%
  ggplot(aes(x=testrate, y = fraction_overlap)) +
  geom_point(alpha=.8) +
  theme_c() +
  facet_wrap(~name, scales="free") +
  labs( x= "Total Tests / Population Size", 
        y = "Fraction of Covidestim Interval\nOverlapping with the PB interval",
        title = "Comparing Fraction of Interval Covered by Testing Rate",
        subtitle = "Each Point Corresponds to a State-Biweek")

state_overlaps %>%
  mutate(testrate=total/population) %>%
  filter(name %in% state_sample) %>%
  ggplot(aes(x=testrate, y = fraction_overlap)) +
  geom_point(alpha=.8) +
  theme_c() +
  labs( x= "Total Tests / Population Size", 
        y = "Fraction of Covidestim Interval\nOverlapping with the PB interval",
        title = "Comparing Fraction of Interval Covered by Testing Rate",
        subtitle = "Each Point Corresponds to a State-Biweek")

Relationship Between Agreement and Cumulative Incidence (Pre-Omicron)

state_population_link <- "https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/state/detail/SCPRC-EST2019-18+POP-RES.csv"
state_pop <- read_csv(state_population_link)
statecodes <- read_csv(here("data/demographic/statecodes.csv"))
  
state_pop <- state_pop %>%
    left_join(statecodes, by = c("NAME" = "state")) %>%
    select(population = POPESTIMATE2019,
           state = code,
           name = NAME) %>%
    filter(!is.na(state))
  
state_deaths <- tar_read(state_deaths, store=targets_dir)


set.seed(123)
state_sample <- sample(unique(state_deaths$state), 5)

state_deaths %>%
#  filter(state %in% state_sample) %>%
  ggplot(aes(x=date,y=deaths/cases)) +
  geom_line() +
  facet_wrap(~state) +
  theme_c() +
  geom_vline(xintercept=end_old_model, color='darkred', linetype=2)

comp_deaths <- state_deaths %>%
  mutate(omicron=ifelse(date <= mdy("12-02-2021"),
                        "pre_omicron", "omicron")) %>%
  group_by(state, omicron) %>%
  summarize(cumulative_deaths = sum(deaths),
            cumulative_cases= sum(cases)) %>%
  pivot_wider(names_from = omicron, 
              values_from = c(cumulative_deaths, cumulative_cases)) %>%
  mutate(pre_deaths_over_cases = cumulative_deaths_pre_omicron/cumulative_cases_pre_omicron,
         omicron_deaths_over_cases = cumulative_deaths_omicron/cumulative_cases_omicron) 



comp_deaths %>%
  select(state,pre_deaths_over_cases,omicron_deaths_over_cases) %>%
  pivot_longer(contains("over_cases")) %>%
  group_by(state) %>%
  mutate(pre = value[which(name=="pre_deaths_over_cases")]) %>%
  mutate(name = case_when(
    grepl("omicron", name) ~ "Omicron",
    grepl("pre", name) ~ "Before Omicron"
  )) %>%
  ggplot(aes(y=fct_reorder(state,pre),  x= value, fill=name)) +
  geom_bar(stat='identity', position='dodge') +
  theme_c(legend.title = element_text(face="bold", size=14)) +
  labs(x = TeX("$\\frac{Total\\,Deaths}{Total\\,Cases}$"),
       y="",
       fill = "Time Period",
       title = "Total Deaths Over Total Cases in the Time Period Before Omicron\n Compared to During Omicron") +
  scale_x_continuous(n.breaks=7)

comp_deaths %>%
  select(state,pre_deaths_over_cases,omicron_deaths_over_cases) %>%
  mutate(pct_change =( omicron_deaths_over_cases -pre_deaths_over_cases ) / pre_deaths_over_cases ) %>% 
  ggplot(aes(y=fct_reorder(state,pct_change),  x= pct_change)) +
  geom_bar(stat='identity', position='dodge') +
  theme_c(legend.title = element_text(face="bold", size=11),
          axis.text.x=element_text(size=14),
          plot.subtitle = element_text(size=16,
                                       face="plain", 
                                       margin=margin(0,0,4,0))) +
  labs(x = TeX("$\\frac{Total\\,Deaths}{Total\\,Cases}$"),
       y="",
       fill = "Time Period",
       title = TeX("Percent Change in $\\frac{Total\\,Deaths}{Total\\,Cases}$ from the Time Period Before Omicron"),
       subtitle="Compared to During Omicron") +
  scale_x_continuous(labels=scales::percent)

comp_deaths %>%
  select(state,pre_deaths_over_cases,omicron_deaths_over_cases) %>%
  mutate(pct_change =( omicron_deaths_over_cases - pre_deaths_over_cases ) / pre_deaths_over_cases ) %>%
  left_join(state_overlaps, by = c('state' = 'name')) %>%
  group_by(state) %>%
  summarize(mean_overlap=mean(fraction_overlap),
            pct_change=unique(pct_change)) %>%
  ggplot(aes(x=pct_change,y=mean_overlap)) +
  geom_point(size=2) +
  theme_c(  plot.subtitle = element_text(size=16,
                                       face="plain", 
                                       margin=margin(0,0,4,0))) +
  scale_x_continuous(labels=scales::percent) +
  labs(x="Percent Change", y = "Mean Overlap\n(By State)", 
        title = TeX("Relationship Between the Percent Change in $\\frac{Total\\,Deaths}{Total\\,Cases}$ from the Time Period Before Omicron"),
       subtitle="Compared to During Omicron versus the Mean Overlap")

comp_deaths %>%
  rename(name=state) %>%
  ungroup() %>%
  left_join(state_pop[,c("name", "population")]) %>%
  mutate(frac_infected = cumulative_cases_pre_omicron/population) %>%
  left_join(state_overlaps[,c("name","fraction_overlap", "biweek")]) %>%
  filter(biweek >= 25) %>%
  group_by(name) %>%
  summarize(mean_overlap=mean(fraction_overlap),
            frac_infected=unique(frac_infected)) %>%
  ggplot(aes(x=frac_infected, y =mean_overlap)) +
  geom_point() +
  theme_c()

covidestim_link <- "https://covidestim.s3.us-east-2.amazonaws.com/latest/state/estimates.csv"
  
covidestim <- read_csv(covidestim_link)
  
# join data from each source to include dates before and after 2021-12-02
covidestim <- covidestim %>%
    select(date, contains("infections"), state) %>%
    filter(year(date) > 2020) %>%
    mutate(week = week(date), year = year(date))


covidestim %>%
  group_by(state) %>%
  summarize(infections=sum(infections))%>%
  rename(name=state) %>%
  left_join(state_pop[,c("name", "population")]) %>%
  mutate(frac_infected = infections/population) %>%
  left_join(state_overlaps[,c("name","fraction_overlap", "biweek")]) %>%
  filter(biweek >= 25) %>%
  group_by(name) %>%
  summarize(mean_overlap=mean(fraction_overlap),
            frac_infected=unique(frac_infected)) %>%
  ggplot(aes(x=frac_infected, y =mean_overlap)) +
  geom_point() +
  theme_c()